{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#加载飞桨、Numpy和相关类库\n",
    "import paddle\n",
    "import paddle.fluid as fluid\n",
    "import paddle.fluid.dygraph as dygraph\n",
    "from paddle.fluid.dygraph import Linear\n",
    "import numpy as np\n",
    "import os\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data():\n",
    "    # 从文件导入数据\n",
    "    datafile = 'datasets/housing.data'\n",
    "    data = np.fromfile(datafile, sep=' ')\n",
    "\n",
    "    # 每条数据包括14项，其中前面13项是影响因素，第14项是相应的房屋价格中位数\n",
    "    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \\\n",
    "                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]\n",
    "    feature_num = len(feature_names)\n",
    "\n",
    "    # 将原始数据进行Reshape，变成[N, 14]这样的形状\n",
    "    data = data.reshape([data.shape[0] // feature_num, feature_num])\n",
    "\n",
    "    # 将原数据集拆分成训练集和测试集\n",
    "    # 这里使用80%的数据做训练，20%的数据做测试\n",
    "    # 测试集和训练集必须是没有交集的\n",
    "    ratio = 0.8\n",
    "    offset = int(data.shape[0] * ratio)\n",
    "    training_data = data[:offset]\n",
    "\n",
    "    # 计算train数据集的最大值，最小值，平均值\n",
    "    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \\\n",
    "                                 training_data.sum(axis=0) / training_data.shape[0]\n",
    "    \n",
    "    # 记录数据的归一化参数，在预测时对数据做归一化\n",
    "    global max_values\n",
    "    global min_values\n",
    "    global avg_values\n",
    "    max_values = maximums\n",
    "    min_values = minimums\n",
    "    avg_values = avgs\n",
    "\n",
    "    # 对数据进行归一化处理\n",
    "    for i in range(feature_num):\n",
    "        #print(maximums[i], minimums[i], avgs[i])\n",
    "        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])\n",
    "\n",
    "    # 训练集和测试集的划分比例\n",
    "    training_data = data[:offset]\n",
    "    test_data = data[offset:]\n",
    "    return training_data, test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Regressor(fluid.dygraph.Layer):\n",
    "    def __init__(self, name_scope):\n",
    "        super(Regressor, self).__init__(name_scope)\n",
    "        name_scope = self.full_name()\n",
    "        # 定义一层全连接层，输出维度是1，激活函数为None，即不使用激活函数\n",
    "        self.fc = Linear(input_dim=13, output_dim=1, act=None)\n",
    "    \n",
    "    # 网络的前向计算函数\n",
    "    def forward(self, inputs):\n",
    "        x = self.fc(inputs)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义飞桨动态图的工作环境\n",
    "with fluid.dygraph.guard():\n",
    "    # 声明定义好的线性回归模型\n",
    "    model = Regressor(\"Regressor\")\n",
    "    # 开启模型训练模式\n",
    "    model.train()\n",
    "    # 加载数据\n",
    "    training_data, test_data = load_data()\n",
    "    # 定义优化算法，这里使用随机梯度下降-SGD\n",
    "    # 学习率设置为0.01\n",
    "    opt = fluid.optimizer.SGD(learning_rate=0.01, parameter_list=model.parameters())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 0, iter: 0, loss is: [0.05281785]\n",
      "epoch: 0, iter: 20, loss is: [0.05781517]\n",
      "epoch: 0, iter: 40, loss is: [0.02249105]\n",
      "epoch: 1, iter: 0, loss is: [0.09387153]\n",
      "epoch: 1, iter: 20, loss is: [0.02616495]\n",
      "epoch: 1, iter: 40, loss is: [0.07647146]\n",
      "epoch: 2, iter: 0, loss is: [0.0653851]\n",
      "epoch: 2, iter: 20, loss is: [0.03568998]\n",
      "epoch: 2, iter: 40, loss is: [0.10727332]\n",
      "epoch: 3, iter: 0, loss is: [0.03793078]\n",
      "epoch: 3, iter: 20, loss is: [0.03857902]\n",
      "epoch: 3, iter: 40, loss is: [0.05264254]\n",
      "epoch: 4, iter: 0, loss is: [0.02664]\n",
      "epoch: 4, iter: 20, loss is: [0.03910763]\n",
      "epoch: 4, iter: 40, loss is: [0.02433527]\n",
      "epoch: 5, iter: 0, loss is: [0.0186352]\n",
      "epoch: 5, iter: 20, loss is: [0.06699806]\n",
      "epoch: 5, iter: 40, loss is: [0.03903596]\n",
      "epoch: 6, iter: 0, loss is: [0.07308879]\n",
      "epoch: 6, iter: 20, loss is: [0.04727199]\n",
      "epoch: 6, iter: 40, loss is: [0.01989539]\n",
      "epoch: 7, iter: 0, loss is: [0.0130775]\n",
      "epoch: 7, iter: 20, loss is: [0.0333111]\n",
      "epoch: 7, iter: 40, loss is: [0.09860535]\n",
      "epoch: 8, iter: 0, loss is: [0.01305531]\n",
      "epoch: 8, iter: 20, loss is: [0.02581232]\n",
      "epoch: 8, iter: 40, loss is: [0.00456481]\n",
      "epoch: 9, iter: 0, loss is: [0.04312572]\n",
      "epoch: 9, iter: 20, loss is: [0.04629264]\n",
      "epoch: 9, iter: 40, loss is: [0.00864074]\n"
     ]
    }
   ],
   "source": [
    "with dygraph.guard():\n",
    "    EPOCH_NUM = 10   # 设置外层循环次数\n",
    "    BATCH_SIZE = 10  # 设置batch大小\n",
    "    \n",
    "    # 定义外层循环\n",
    "    for epoch_id in range(EPOCH_NUM):\n",
    "        # 在每轮迭代开始之前，将训练数据的顺序随机的打乱\n",
    "        np.random.shuffle(training_data)\n",
    "        # 将训练数据进行拆分，每个batch包含10条数据\n",
    "        mini_batches = [training_data[k:k+BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE)]\n",
    "        # 定义内层循环\n",
    "        for iter_id, mini_batch in enumerate(mini_batches):\n",
    "            # 获得当前批次训练数据\n",
    "            x = np.array(mini_batch[:, :-1]).astype('float32')\n",
    "            # 获得当前批次训练标签（真实房价）\n",
    "            y = np.array(mini_batch[:, -1:]).astype('float32')\n",
    "            # 将numpy数据转为飞桨动态图variable形式\n",
    "            house_features = dygraph.to_variable(x)\n",
    "            prices = dygraph.to_variable(y)\n",
    "            \n",
    "            # 前向计算\n",
    "            predicts = model(house_features)\n",
    "            \n",
    "            # 计算损失\n",
    "            loss = fluid.layers.square_error_cost(predicts, label=prices)\n",
    "            avg_loss = fluid.layers.mean(loss)\n",
    "            if iter_id%20==0:\n",
    "                print(\"epoch: {}, iter: {}, loss is: {}\".format(epoch_id, iter_id, avg_loss.numpy()))\n",
    "            \n",
    "            # 反向传播\n",
    "            avg_loss.backward()\n",
    "            # 最小化loss,更新参数\n",
    "            opt.minimize(avg_loss)\n",
    "            # 清除梯度\n",
    "            model.clear_gradients()\n",
    "    # 保存模型\n",
    "    fluid.save_dygraph(model.state_dict(), 'LR_model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "模型保存成功，模型参数保存在LR_model中\n"
     ]
    }
   ],
   "source": [
    "# 定义飞桨动态图工作环境\n",
    "with fluid.dygraph.guard():\n",
    "    # 保存模型参数，文件名为LR_model\n",
    "    fluid.save_dygraph(model.state_dict(), 'LR_model')\n",
    "    print(\"模型保存成功，模型参数保存在LR_model中\")\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_one_example(data_dir):\n",
    "    f = open(data_dir, 'r')\n",
    "    datas = f.readlines()\n",
    "    # 选择倒数第10条数据用于测试\n",
    "    tmp = datas[-10]\n",
    "    tmp = tmp.strip().split()\n",
    "    one_data = [float(v) for v in tmp]\n",
    "\n",
    "    # 对数据进行归一化处理\n",
    "    for i in range(len(one_data)-1):\n",
    "        one_data[i] = (one_data[i] - avg_values[i]) / (max_values[i] - min_values[i])\n",
    "\n",
    "    data = np.reshape(np.array(one_data[:-1]), [1, -1]).astype(np.float32)\n",
    "    label = one_data[-1]\n",
    "    return data, label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inference result is [[23.601082]], the corresponding label is 19.7\n"
     ]
    }
   ],
   "source": [
    "\n",
    "with dygraph.guard():\n",
    "    # 参数为保存模型参数的文件地址\n",
    "    model_dict, _ = fluid.load_dygraph('LR_model')\n",
    "    model.load_dict(model_dict)\n",
    "    model.eval()\n",
    "\n",
    "    # 参数为数据集的文件地址\n",
    "    test_data, label = load_one_example('./work/housing.data')\n",
    "    # 将数据转为动态图的variable格式\n",
    "    test_data = dygraph.to_variable(test_data)\n",
    "    results = model(test_data)\n",
    "\n",
    "    # 对结果做反归一化处理\n",
    "    results = results * (max_values[-1] - min_values[-1]) + avg_values[-1]\n",
    "    print(\"Inference result is {}, the corresponding label is {}\".format(results.numpy(), label))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
